###############################
#
#define settings
#
###############################
GET_SAMPLE_NAMES <- TRUE # if set to FALSE define it in the next line
#sample_names_user <- c()
unique <- params$unique # how many peptides should be at least necessary to identify a protein
# which types should be evaluated TRUE or FALSE could be used
RAZOR <- TRUE
UNIQUE <- TRUE
LFQ <- FALSE
# define path of the proteinGroups.txt file(s)
path = params$path
Filtering:
Potential.contaminant != "+" -> remove where
contaminants
Reverse != "+"-> remove where reverse =
+
Only.identified.by.site != "+" -> remove where
only identified by site = +
data[name] >= unique -> keep only
identifications which have greater or equal number (defined via variable
unique) of razor/unique peptides | typically
this is set to 1 or 2
Data analysis show the number of identified proteins and
the heatmap. In Summary the final results are visualized as
boxplot and a table with the summarized values is shown.
Filtering:
Potential.contaminant != "+" -> remove where
contaminants = +Reverse != "+"-> remove where reverse = +Only.identified.by.site != "+" -> remove where only
identified by site = +Then the log2of the LFQ intensities is calculated and
all -Inf values were substituted by NA using the following
command: lfq[lfq == -Inf] <- NA
Data analysis show the number of identified proteins and
the heatmap. In Summary the final results are visualized as
boxplot and a table with the summarized values is shown.
# load libraries
library(tidyverse)
library(dplyr)
library(pheatmap)
library(ggplot2)
library(reshape2)
library(knitr)
library(plotly)
# load functions
source("filtering_function.R")
print("loading sucessful")
## [1] "loading sucessful"
The txt-files of the defined folder are read in, processed and filtered automatically. This section shows the code, heatmaps and also some summary statistics.
infile <- read.csv(path, dec=".", sep="\t")
res_raw <- list(infile)
########## get sample names
if (GET_SAMPLE_NAMES == TRUE){
#get sample names
tmp <- colnames(res_raw[[1]])
tmp <- tmp[grep("Razor...unique.peptides.",tmp)]
tmp <- sub(".*Razor...unique.peptides.", "",tmp)
sample_names_raw <- tmp
rm(tmp)
#create sample names
sample_names_razor <- paste("Razor...unique.peptides.",sample_names_raw, sep="")
sample_names_unique <- paste("Unique.peptides.",sample_names_raw, sep="")
sample_names_lfq <- paste("LFQ.intensity.",sample_names_raw, sep="")
} else {
res_raw[[1]] <- res_raw[[1]] %>%
rename_with(~ paste("Razor_", sample_names_user, sep = ""),
starts_with("Razor...unique.peptides.")) %>%
rename_with(~ paste("Unique_", sample_names_user, sep = ""),
starts_with("Unique.peptides.")) %>%
rename_with(~ paste("LFQ_", sample_names_user, sep = ""),
starts_with("LFQ.intensity."))
# get column names
sample_names_razor <- colnames(res_raw[[i]][grep("Razor_",
colnames(res_raw[[i]]))])
sample_names_unique <- colnames(res_raw[[i]][grep("Unique_",
colnames(res_raw[[i]]))])
sample_names_lfq <- colnames(res_raw[[i]][grep("LFQ_",
colnames(res_raw[[i]]))])
}
########## create result data frame & result lists
# create data frame for razor
results_2gether_razor <- data.frame(matrix(ncol = length(sample_names_razor), nrow = 1))
colnames(results_2gether_razor) <- sample_names_razor
#create data frame for unique
results_2gether_unique <- data.frame(matrix(ncol = length(sample_names_unique), nrow = 1))
colnames(results_2gether_unique) <- sample_names_unique
#create data frame for lfq
results_2gether_lfq <- data.frame(matrix(ncol = length(sample_names_lfq), nrow = 1))
colnames(results_2gether_lfq) <- sample_names_lfq
#results lists
results_razor <- list()
results_unique <- list()
results_lfq <- list()
########## perform evaluation
if (RAZOR == TRUE) {
cat("\n")
cat("##","Razor","\n")
cat("\n")
data_raw <- as.data.frame(res_raw[[1]])
data <- filtering(data_raw)
fasta <- data$Fasta.headers
results <- data.frame(matrix(ncol = length(sample_names_razor), nrow = nrow(data)))
colnames(results) <- sample_names_razor
i <- 0
for (i in 1:length(sample_names_razor)){
name <- sample_names_razor[i]
tmp <- (data[name] >= unique)
results[i] <- tmp
cat(name,"<br/>",sum(tmp, na.rm = TRUE),"<br/>")
}
results_2gether_razor[1,] <- sapply(results,function(x)sum(x, na.rm = TRUE))
results$FASTA <- data$Fasta.headers
results_razor[[1]] <- results
cat("\n")
#create heatmaps
paletteLength <- 2
myColor <- colorRampPalette(c("navy", "white",
"firebrick3"))(paletteLength)
data4pheatmap <- results[,-ncol(results)]
data4pheatmap <- data4pheatmap*1
data4pheatmap[data4pheatmap == 0] <- NA
# remove NA rows
ind <- apply(data4pheatmap, 1, function(x) all(is.na(x)))
data4pheatmap_clear <- data4pheatmap[!ind,]
data4pheatmap_clear[is.na(data4pheatmap_clear)] <- 0
if (ncol(data4pheatmap_clear) < 2){
cat("\n")
cat("##"," No heatmap possible. Too less columns in the result
table.","\n")
} else {
pheatmap(data4pheatmap_clear,
legend_breaks = c(0,1),
color = myColor,
treeheight_row = 10,
angle_col ="45",
treeheight_col = 10,
legend = TRUE,
labels_row = rep("",nrow(data4pheatmap)),
labels_col = sample_names_raw)
cat("\n")
}
}
Razor…unique.peptides.E4_D2_2_Gesamt_01
16
Razor…unique.peptides.E4_D2_2_Gesamt_02
14
Razor…unique.peptides.E4_D2_3_Gesamt_01
20
Razor…unique.peptides.E4_D2_3_Gesamt_02
21
Razor…unique.peptides.E4_D2_Pool_20_B1_01
2
Razor…unique.peptides.E4_D2_Pool_20_B1_02
4
Razor…unique.peptides.E4_D2_Pool_20_B10_01
5
Razor…unique.peptides.E4_D2_Pool_20_B10_02
5
Razor…unique.peptides.E4_D2_Pool_20_B2_01
2
Razor…unique.peptides.E4_D2_Pool_20_B2_02
3
Razor…unique.peptides.E4_D2_Pool_20_B3_01
5
Razor…unique.peptides.E4_D2_Pool_20_B3_02
4
Razor…unique.peptides.E4_D2_Pool_20_B4_01
8
Razor…unique.peptides.E4_D2_Pool_20_B4_02
6
Razor…unique.peptides.E4_D2_Pool_20_B5_01
11
Razor…unique.peptides.E4_D2_Pool_20_B5_02
10
Razor…unique.peptides.E4_D2_Pool_20_B6_01
17
Razor…unique.peptides.E4_D2_Pool_20_B6_02
18
Razor…unique.peptides.E4_D2_Pool_20_B7_01
15
Razor…unique.peptides.E4_D2_Pool_20_B7_02
15
Razor…unique.peptides.E4_D2_Pool_20_B8_01
10
Razor…unique.peptides.E4_D2_Pool_20_B8_02
8
Razor…unique.peptides.E4_D2_Pool_20_B9_01
6
Razor…unique.peptides.E4_D2_Pool_20_B9_02
7
if (UNIQUE == TRUE) {
cat("\n")
cat("##","Unique","\n")
cat("\n")
data_raw <- as.data.frame(res_raw[[1]])
data <- filtering(data_raw)
results <- data.frame(matrix(ncol = length(sample_names_unique), nrow = nrow(data)))
colnames(results) <- sample_names_unique
i <- 0
for (i in 1:length(sample_names_unique)){
name <- sample_names_unique[i]
tmp <- (data[name] >= unique)
results[i] <- tmp
cat(name,"<br/>",sum(tmp, na.rm = TRUE),"<br/>")
}
results_2gether_unique[1,] <- sapply(results,function(x)sum(x, na.rm = TRUE))
results$FASTA <- data$Fasta.headers
results_unique[[1]] <- results
cat("\n")
#create heatmaps
paletteLength <- 2
myColor <- colorRampPalette(c("navy", "white",
"firebrick3"))(paletteLength)
data4pheatmap <- results[,-ncol(results)]
data4pheatmap <- data4pheatmap*1
data4pheatmap[data4pheatmap == 0] <- NA
# remove NA rows
ind <- apply(data4pheatmap, 1, function(x) all(is.na(x)))
data4pheatmap_clear <- data4pheatmap[!ind,]
data4pheatmap_clear[is.na(data4pheatmap_clear)] <- 0
if (ncol(data4pheatmap) < 2){
cat("\n")
cat("##"," No heatmap possible. Too less columns in the result
table.","\n")
} else {
pheatmap(data4pheatmap_clear,
legend_breaks = c(0,1),
color = myColor,
treeheight_row = 10,
angle_col ="45",
treeheight_col = 10,
legend = TRUE,
labels_row = rep("",nrow(data4pheatmap)),
labels_col = sample_names_raw)
cat("\n")
}
}
Unique.peptides.E4_D2_2_Gesamt_01
11
Unique.peptides.E4_D2_2_Gesamt_02
9
Unique.peptides.E4_D2_3_Gesamt_01
14
Unique.peptides.E4_D2_3_Gesamt_02
15
Unique.peptides.E4_D2_Pool_20_B1_01
0
Unique.peptides.E4_D2_Pool_20_B1_02
1
Unique.peptides.E4_D2_Pool_20_B10_01
3
Unique.peptides.E4_D2_Pool_20_B10_02
2
Unique.peptides.E4_D2_Pool_20_B2_01
1
Unique.peptides.E4_D2_Pool_20_B2_02
1
Unique.peptides.E4_D2_Pool_20_B3_01
2
Unique.peptides.E4_D2_Pool_20_B3_02
2
Unique.peptides.E4_D2_Pool_20_B4_01
4
Unique.peptides.E4_D2_Pool_20_B4_02
2
Unique.peptides.E4_D2_Pool_20_B5_01
6
Unique.peptides.E4_D2_Pool_20_B5_02
6
Unique.peptides.E4_D2_Pool_20_B6_01
12
Unique.peptides.E4_D2_Pool_20_B6_02
13
Unique.peptides.E4_D2_Pool_20_B7_01
9
Unique.peptides.E4_D2_Pool_20_B7_02
9
Unique.peptides.E4_D2_Pool_20_B8_01
7
Unique.peptides.E4_D2_Pool_20_B8_02
5
Unique.peptides.E4_D2_Pool_20_B9_01
4
Unique.peptides.E4_D2_Pool_20_B9_02
5
if (LFQ == TRUE) {
cat("\n")
cat("##","LFQ","\n")
cat("\n")
data_raw <- as.data.frame(res_raw[[1]])
data <- filtering(data_raw)
results <- data.frame(matrix(ncol = length(sample_names_lfq), nrow = nrow(data)))
colnames(results) <- sample_names_lfq
i <- 0
for (i in 1:length(sample_names_lfq)){
name <- sample_names_lfq[i]
tmp <- log2(data[name])
tmp[tmp == -Inf] <- NA
results[i] <- tmp
cat(name,"<br/>",sum(!is.na(tmp)),"<br/>")
}
results_2gether_lfq[1,] <- sapply(results,function(x)sum(!is.na(x)))
results$FASTA <- data$Fasta.headers
results_lfq[[1]] <- results
cat("\n")
#create heatmaps
paletteLength <- 50
myColor <- colorRampPalette(c("navy", "white",
"firebrick3"))(paletteLength)
data4pheatmap <- results[,-ncol(results)]
data4pheatmap <- data4pheatmap*1
data4pheatmap[data4pheatmap == 0] <- NA
# remove NA rows
ind <- apply(data4pheatmap, 1, function(x) all(is.na(x)))
data4pheatmap_clear <- data4pheatmap[!ind,]
data4pheatmap_clear[is.na(data4pheatmap_clear)] <- 0
if (ncol(data4pheatmap) < 2){
cat("\n")
cat("##"," No heatmap possible. Too less columns in the result
table.","\n")
} else {
pheatmap(data4pheatmap_clear,
color = myColor,
treeheight_row = 10,
angle_col ="45",
treeheight_col = 10,
legend = TRUE,
labels_row = rep("",nrow(data4pheatmap)),
labels_col = sample_names_raw)
cat("\n")
}
}
# show summary
if (RAZOR == TRUE){
cat("\n")
cat("##"," Summary Razor","\n")
print(kable(results_2gether_razor))
ggplot(melt(results_2gether_razor),aes(x = variable, y = value)) +
geom_boxplot() +
xlab("Sample name") +
scale_x_discrete(labels = sample_names_raw) +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Razor")
}
| Razor…unique.peptides.E4_D2_2_Gesamt_01 | Razor…unique.peptides.E4_D2_2_Gesamt_02 | Razor…unique.peptides.E4_D2_3_Gesamt_01 | Razor…unique.peptides.E4_D2_3_Gesamt_02 | Razor…unique.peptides.E4_D2_Pool_20_B1_01 | Razor…unique.peptides.E4_D2_Pool_20_B1_02 | Razor…unique.peptides.E4_D2_Pool_20_B10_01 | Razor…unique.peptides.E4_D2_Pool_20_B10_02 | Razor…unique.peptides.E4_D2_Pool_20_B2_01 | Razor…unique.peptides.E4_D2_Pool_20_B2_02 | Razor…unique.peptides.E4_D2_Pool_20_B3_01 | Razor…unique.peptides.E4_D2_Pool_20_B3_02 | Razor…unique.peptides.E4_D2_Pool_20_B4_01 | Razor…unique.peptides.E4_D2_Pool_20_B4_02 | Razor…unique.peptides.E4_D2_Pool_20_B5_01 | Razor…unique.peptides.E4_D2_Pool_20_B5_02 | Razor…unique.peptides.E4_D2_Pool_20_B6_01 | Razor…unique.peptides.E4_D2_Pool_20_B6_02 | Razor…unique.peptides.E4_D2_Pool_20_B7_01 | Razor…unique.peptides.E4_D2_Pool_20_B7_02 | Razor…unique.peptides.E4_D2_Pool_20_B8_01 | Razor…unique.peptides.E4_D2_Pool_20_B8_02 | Razor…unique.peptides.E4_D2_Pool_20_B9_01 | Razor…unique.peptides.E4_D2_Pool_20_B9_02 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 16 | 14 | 20 | 21 | 2 | 4 | 5 | 5 | 2 | 3 | 5 | 4 | 8 | 6 | 11 | 10 | 17 | 18 | 15 | 15 | 10 | 8 | 6 | 7 |
if (UNIQUE == TRUE){
cat("\n")
cat("##"," Summary Unique","\n")
print(kable(results_2gether_unique))
ggplot(melt(results_2gether_unique),aes(x = variable, y = value)) +
geom_boxplot() +
xlab("Sample name") +
scale_x_discrete(labels = sample_names_raw) +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Unique")
}
| Unique.peptides.E4_D2_2_Gesamt_01 | Unique.peptides.E4_D2_2_Gesamt_02 | Unique.peptides.E4_D2_3_Gesamt_01 | Unique.peptides.E4_D2_3_Gesamt_02 | Unique.peptides.E4_D2_Pool_20_B1_01 | Unique.peptides.E4_D2_Pool_20_B1_02 | Unique.peptides.E4_D2_Pool_20_B10_01 | Unique.peptides.E4_D2_Pool_20_B10_02 | Unique.peptides.E4_D2_Pool_20_B2_01 | Unique.peptides.E4_D2_Pool_20_B2_02 | Unique.peptides.E4_D2_Pool_20_B3_01 | Unique.peptides.E4_D2_Pool_20_B3_02 | Unique.peptides.E4_D2_Pool_20_B4_01 | Unique.peptides.E4_D2_Pool_20_B4_02 | Unique.peptides.E4_D2_Pool_20_B5_01 | Unique.peptides.E4_D2_Pool_20_B5_02 | Unique.peptides.E4_D2_Pool_20_B6_01 | Unique.peptides.E4_D2_Pool_20_B6_02 | Unique.peptides.E4_D2_Pool_20_B7_01 | Unique.peptides.E4_D2_Pool_20_B7_02 | Unique.peptides.E4_D2_Pool_20_B8_01 | Unique.peptides.E4_D2_Pool_20_B8_02 | Unique.peptides.E4_D2_Pool_20_B9_01 | Unique.peptides.E4_D2_Pool_20_B9_02 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 11 | 9 | 14 | 15 | 0 | 1 | 3 | 2 | 1 | 1 | 2 | 2 | 4 | 2 | 6 | 6 | 12 | 13 | 9 | 9 | 7 | 5 | 4 | 5 |
if (LFQ == TRUE){
cat("\n")
cat("##"," Summary LFQ","\n")
print(kable(results_2gether_lfq))
ggplot(melt(results_2gether_lfq),aes(x = variable, y = value)) +
geom_boxplot() +
xlab("Sample name") +
scale_x_discrete(labels = sample_names_raw) +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "LFQ")
}
# save results
results_final <- data.frame(results_2gether_razor,results_2gether_unique,results_2gether_lfq)
# get rid of NA columns
results_final <- results_final[,colSums(is.na(results_final))<nrow(results_final)]
write.csv2(results_final, file = paste(params$pathRDS,"_summary.csv",sep=""))
saveRDS(results_razor, file = paste(params$pathRDS,"_Razor.RDS",sep=""))
saveRDS(results_unique, file = paste(params$pathRDS,"_Unique.RDS",sep=""))
saveRDS(results_lfq, file = paste(params$pathRDS,"_LFQ.RDS",sep=""))
p <- ggplot(melt(results_2gether_razor),aes(x = variable, y = value)) +
geom_boxplot() +
xlab("Sample name") +
scale_x_discrete(labels = sample_names_raw) +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Razor")
ggplotly(p)
dat <- results_razor[[1]]
yeasx <- grep("YEASX",dat$FASTA)
wheat <- grep("WHEAT",dat$FASTA)
# write human oder trica in a column
for (i in 1:nrow(dat)){
if (grepl("YEASX",dat$FASTA[i])){
dat$org[i] <- "YEASX"
}
else if (grepl("WHEAT",dat$FASTA[i])){
dat$org[i] <- "WHEAT"
}
else {
dat$org[i] <- NA
}
}
dat$org[intersect(yeasx,wheat)] <- "YEASX/WHEAT"
dat4plot <- dat[,-c(ncol(dat)-1)]
dat4plot_long <- melt(dat4plot, id = c("org"))
tmp <- regmatches(dat4plot_long$variable,gregexpr("(?<=des.).*",dat4plot_long$variable,perl=TRUE))
tmp <- unlist(tmp)
dat4plot_long$variable <- tmp
p <- ggplot(data=dat4plot_long, aes(x=variable, y=as.numeric(value), fill=org)) +
stat_summary(fun.y = sum, geom = "bar", position = "dodge") +
xlab("sample name") +
ylab("counts") +
theme(axis.text.x = element_text(angle = 90))
ggplotly(p)
for (i in 1:(ncol(dat)-2)){
cat("\n")
text <- paste(" Fasta Headers - Sample",sample_names_raw[i])
cat("##",text,"\n")
cat("\"n")
print(dat$fasta[which(dat[,i])])
}
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
“nNULL
## [1] "Time used for analysis: 0.6 minutes"